Loading data

We’re gonna look at weather data.

library(tidyverse)
library(p8105.datasets)
library(plotly)
library(dplyr)  
data("ny_noaa")

ny_noaa_fixed = ny_noaa %>%
  separate(date, sep="-", into = c("year", "month", "day")) %>%
  mutate(
   year = as.integer(year),
   month = as.integer(month),
   day = as.integer(day),
   tmax = as.numeric(tmax),
   tmin = as.numeric(tmin),
   tmax = tmax / 10,
   tmin = tmin / 10
  )
ny_noaa_fixed %>%
  filter(year > 2009)
## # A tibble: 159,671 x 9
##    id           year month   day  prcp  snow  snwd  tmax  tmin
##    <chr>       <int> <int> <int> <int> <int> <int> <dbl> <dbl>
##  1 US1NYAB0001  2010     1     1    33    28    NA    NA    NA
##  2 US1NYAB0001  2010     1     2    28    84    NA    NA    NA
##  3 US1NYAB0001  2010     1     3    56    58    NA    NA    NA
##  4 US1NYAB0001  2010     1     4    38    38    NA    NA    NA
##  5 US1NYAB0001  2010     1     5     0     0    NA    NA    NA
##  6 US1NYAB0001  2010     1     6     0     0    NA    NA    NA
##  7 US1NYAB0001  2010     1     7     0     0    NA    NA    NA
##  8 US1NYAB0001  2010     1     8     0     0    NA    NA    NA
##  9 US1NYAB0001  2010     1     9    36    51    NA    NA    NA
## 10 US1NYAB0001  2010     1    10     0     0    NA    NA    NA
## # … with 159,661 more rows
159671/2
## [1] 79835.5

Plotly plots

scatterplot

smaller_df = sample_n(ny_noaa_fixed, 500)  
ny_noaa_fixed %>% 
  filter(year > 2009) %>%
  mutate(text_label = str_c("Month: ", month, "\nYear: ", year)) %>% 
  plot_ly(
    x = ~tmax, y = ~tmin, color = ~factor(month), text = ~text_label, 
    alpha = .5, type = "scatter", mode = "markers")
## Warning: Ignoring 108204 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

boxplot

ny_noaa_fixed %>% 
 filter(month == c(1,7)) %>%
  mutate(year = factor(year), month = factor(month)) %>% 
  mutate(
    month = ifelse( month == 1, "January", "July")
  ) %>%
  plot_ly(
    y = ~year, x = ~tmax, color = ~month,
    type = "box", colors = "viridis") %>%
  layout(
    title = "NY Weather Over the Years in 2 Months ")
## Warning: Ignoring 95158 observations

bar plot

ny_noaa_fixed %>% 
  filter(year > 2004) %>%
  filter(month %in% c(12,1,2,3)) %>%
  filter(snow >0) %>%
  count(snow) %>% 
  mutate(snow = factor(snow) ) %>% #fct_reorder(factor(snow), n)) %>% 
  plot_ly(
    x = ~snow, y = ~n, color = ~snow, 
    type = "bar", colors = "viridis")  %>%
  layout(
    title = "Daily Snow fall (mm) in Winter Months of Years 2005-2010 ")

ggplotly

ggp_scatter = 
  nyc_airbnb %>% 
  ggplot(aes(x = lat, y = long, color = price)) + 
  geom_point()
ggplotly(ggp_scatter)